package org.biogroovy.io.uniprot;

import groovy.util.logging.Slf4j

import javax.xml.namespace.QName
import javax.xml.parsers.DocumentBuilderFactory
import javax.xml.xpath.XPath
import javax.xml.xpath.XPathConstants
import javax.xml.xpath.XPathFactory

import org.biogroovy.io.AbsSeqReader;
import org.biogroovy.io.IFetcher;
import org.biogroovy.models.AbsSequenceFeature
import org.biogroovy.models.Article
import org.biogroovy.models.Author
import org.biogroovy.models.FeatureType
import org.biogroovy.models.GeneOntology
import org.biogroovy.models.GeneOntologyType
import org.biogroovy.models.Journal
import org.biogroovy.models.Pathway
import org.biogroovy.models.Protein
import org.biogroovy.models.ProteinStructureEntry
import org.biogroovy.models.SequenceFeaturePoint
import org.biogroovy.models.SequenceFeatureRange
import org.w3c.dom.Node

/**
 * This class is responsible for reading UniProt XML files.
 */
@Slf4j
public class UniProtReader extends AbsSeqReader<Protein>{

	static final String DATABASE_NAME = "uniprot";

	/** This map contains XPath expressions used to extract fields from an XML document */
	static final Map<String, String> XPATH_MAP = [
		uniprotAccession:'//entry/accession[1]',
		sequence:'//entry/sequence',
		uniprotSymbol:'//entry/name',
		name:'//entry/protein/recommendedName/fullName',
		symbol:'//entry/gene/name[@type="primary"]',
		species:'//entry/organism/name[@type="scientific"]',
		synonyms:'//entry/gene/name[@type="synonym"]',
		references:'//entry/dbReference',
		articles:'//entry/reference',
		goList:'//entry/dbReference[@type="GO"]',
		pathways:'//entry/dbReference[@type="Pathway_Interaction_DB"] | //entry/dbReference[@type="Reactome"]',
		features:'//entry/feature',
		evidence:'//entry/evidence[source/dbReference/@type="PubMed"]',
		entrezGeneId:'//entry/dbReference[@type="GeneID"]/@id',
		omimId:'//entry/dbReference[@type="MIM"]/@id',
		unigeneId:'//entry/dbReference[@type="UniGene"]/@id',
		pdb:'//entry/dbReference[@type="PDB"]',
		pdbSum:'//entry/dbReference[@type="PDBsum"]',
		keywords:'//entry/keyword/text()'
	];

	/** This map contains the nodetypes for each of the fields specified in the XPath maps */
	static final Map<String, QName> NODE_TYPE_MAP = [
		uniprotAccession:XPathConstants.STRING,
		sequence:XPathConstants.STRING,
		uniprotSymbol:XPathConstants.STRING,
		name:XPathConstants.STRING,
		symbol:XPathConstants.STRING,
		species:XPathConstants.STRING,
		synonyms:XPathConstants.NODESET,
		goList:XPathConstants.NODESET,
		entrezGeneId:XPathConstants.NUMBER,
		omimId:XPathConstants.STRING,
		unigeneId:XPathConstants.STRING,
	]

	static final String ROOT_PATH = "//uniprot";
	
	/**
	 * Constructor.
	 */
	public UniProtReader(){
		this.databaseName = DATABASE_NAME;
	}


	@Override
	public Protein fetch(String id) throws IOException {
		return read(getUrl(id, null).openStream());
	}

	@Override
	public Protein read(InputStream inputStream) throws IOException {

		def builder  = DocumentBuilderFactory.newInstance().newDocumentBuilder();
		Node root     = (Node)builder.parse(inputStream).documentElement

		Protein protein = new Protein();
		parse(protein, root);

		return protein;
	}


	@Override
	public List<Protein> readList(InputStream inputStream) throws IOException {
		List<Protein> protList = new ArrayList<Protein>();

		def builder  = DocumentBuilderFactory.newInstance().newDocumentBuilder();
		def root     = builder.parse(inputStream).documentElement

		XPath xpath = XPathFactory.newInstance().newXPath();
		NodeList nodeList = xpath.evaluate(ROOT_PATH, root, XPathConstants.NODESET);
		for(int i=0; i < nodeList.getLength(); i++){
			Node node = nodeList.item(i)
			Protein prot = new Protein();
			parse(prot, node);
			protList.add(prot);
		}

		builder = null;
		root = null;
		return protList;
	}

	@Override
	public void parse(Protein protein, Node root) {

		parseData(root, protein, XPATH_MAP, NODE_TYPE_MAP);
		parseGo(root, protein);
		parsePathways(root, protein);
		parseArticles(root, protein);
		parseEvidence(root, protein);
		parseSequenceFeatures(root, protein);
		parsePdbEntries(root, protein)
		parseKeywords(root, protein)
		
	}
	
	protected void parseKeywords(Object root, Protein prot){
		XPathFactory factory = XPathFactory.newInstance()
		def xpath = factory.newXPath();
		def keyNodeSet = xpath.evaluate(XPATH_MAP.keywords, root, XPathConstants.NODESET)
		
		keyNodeSet.each{it ->
			prot.keywords.add(it.getNodeValue())
		}
	}

	protected void parsePdbEntries(Object root, Protein sequence){
		XPathFactory factory = XPathFactory.newInstance()
		def xpath = factory.newXPath();
		def pdbNodeSet =  xpath.evaluate( XPATH_MAP.pdb, root, XPathConstants.NODESET );
		def pdbSumNodeSet = xpath.evaluate(XPATH_MAP.pdbSum, root, XPathConstants.NODESET);

		def methodXPath = factory.newXPath();
		def resolutionXPath = factory.newXPath();
		def chainsXPath = factory.newXPath();

		// parse pdb nodes
		pdbNodeSet.each{it ->

			ProteinStructureEntry psEntry = new ProteinStructureEntry();
			psEntry.accession = it.getAttribute("id");
			psEntry.structureType = ProteinStructureEntry.StructureType.PDB;
			psEntry.setExpMethod(methodXPath.evaluate('property[@type="method"]/@value', it, XPathConstants.STRING));
			psEntry.resolution = resolutionXPath.evaluate('property[@type="resolution"]/@value', it, XPathConstants.NUMBER);
			psEntry.chains = chainsXPath.evaluate('property[@type="chains"]/@value', it, XPathConstants.STRING);
			sequence.pdbMap.put(psEntry.accession, psEntry)

		}

		pdbSumNodeSet.each{it ->
			ProteinStructureEntry psEntry = new ProteinStructureEntry();
			psEntry.accession = it.getAttribute("id");
			psEntry.structureType = ProteinStructureEntry.StructureType.PDBSum;
			sequence.pdbSumMap.put(psEntry.accession, psEntry);
		}

	}


	/**
	 * This method parses the pathway information from the uniprot record.
	 * @param root
	 * @param sequence
	 */
	protected void parsePathways(Object root, Protein sequence){
		XPathFactory factory = XPathFactory.newInstance();
		def xpath = factory.newXPath();
		def xpath2 = factory.newXPath();

		def nodeSet = xpath.evaluate (XPATH_MAP.pathways,  root, XPathConstants.NODESET);
		nodeSet.each{it->

			Pathway path = new Pathway();

			path.accession = it.getAttribute("id");
			path.datasource = it.getAttribute("type");

			path.name = xpath2.evaluate("./property[@type='pathway name']/@value", it, XPathConstants.STRING)
			sequence.pathways.put(path.datasource, path);
		}

	}

	@Override
	protected void parseDbReferences(Object root, Protein sequence) {
		XPathFactory factory = XPathFactory.newInstance()
		def xpath = factory.newXPath();
		def nodeSet =  xpath.evaluate( XPATH_MAP.references, root, XPathConstants.NODESET );

		def dbXpath = factory.newXPath();
		def dbIdXpath = factory.newXPath();

		nodeSet.eachWithIndex{ it, count ->
			String id = dbIdXpath.evaluate('Dbtag_tag/Object-id/Object-id_id', it, XPathConstants.STRING);
			id = (id == null || id.equals(""))?dbIdXpath.evaluate('Dbtag_tag/Object-id/Object-id_str', it, XPathConstants.STRING):id;
			sequence.references.put(
					dbXpath.evaluate('Dbtag_db', it, XPathConstants.STRING),
					id
					)
		}
	}

	@Override
	protected void parseArticles(Object root, Protein protein) {
		XPathFactory factory = XPathFactory.newInstance();
		def xpath = factory.newXPath();

		def articleXPath = factory.newXPath();


		def nodeSet = xpath.evaluate (XPATH_MAP.articles,  root, XPathConstants.NODESET);
		log.debug("nodeSet: " + nodeSet);
		nodeSet.each{ articleNode ->
			Article art = new Article();
			art.title = articleXPath.evaluate("./citation/title", articleNode, XPathConstants.STRING)

			// parse the pubmed id
			art.pubmedId = articleXPath.evaluate("./citation/dbReference[@type='PubMed']/@id", articleNode, XPathConstants.STRING);

			// parse the authors
			def authNodes = articleXPath.evaluate("./citation/authorList/person", articleNode, XPathConstants.NODESET)
			authNodes.each{authNode ->

				String name = authNode.getAttribute("name")

				int spaceIndex = name.indexOf(" ");
				if (spaceIndex != -1){
					Author author = new Author();

					author.lastname = name.substring(0,spaceIndex);
					author.initials = name.substring(spaceIndex+1);
					art.authors.add(author);
				}
			}

			// parse the article 'scope' values as keywords
			def scopeNodes = articleXPath.evaluate("./scope", articleNode, XPathConstants.NODESET);
			scopeNodes.each{scopeNode ->
				art.keywords.add(scopeNode);
			}

			//parse the journal
			String type = articleXPath.evaluate("./citation/@type", articleNode, XPathConstants.STRING);

			if(type == "journal article"){
				art.journal = new Journal();
				art.journal.title = articleXPath.evaluate("./citation/@name", articleNode, XPathConstants.STRING);
				art.journal.volume = articleXPath.evaluate("./citation/@volume", articleNode, XPathConstants.STRING)
				art.journal.startPage = articleXPath.evaluate("./citation/@first", articleNode, XPathConstants.NUMBER);
				art.journal.endPage = articleXPath.evaluate("./citation/@last", articleNode, XPathConstants.NUMBER)

				protein.articles.add(art);
			}
		}


	}

	@Override
	protected void parseGo(Object root, Protein sequence) {
		XPathFactory factory = XPathFactory.newInstance();
		def xpath = factory.newXPath();

		def nodeSet = xpath.evaluate (XPATH_MAP.goList,  root, XPathConstants.NODESET);
		nodeSet.eachWithIndex{it, i ->

			GeneOntology go = new GeneOntology();

			String tempId = it.getAttribute("id");

			go.goId = Integer.parseInt(tempId.substring(3)); // trim off "GO:" prefix

			String tempName = xpath.evaluate("./property[@type='term']/@value", it, XPathConstants.STRING)
			go.name = tempName.substring(2); // trim off the "C:" prefix

			go.type = GeneOntologyType.identifyType(tempName);
			if (go.type == GeneOntologyType.FUNCTION){
				sequence.goFunctionList.add(go)
			}else if (go.type == GeneOntologyType.PROCESS){
				sequence.goProcessList.add(go);
			}else if (go.type == GeneOntologyType.COMPONENT){
				sequence.goComponentList.add(go)
			}
		}
	}

	protected void parseEvidence(Object root, Protein sequence){
		XPathFactory factory = XPathFactory.newInstance();
		def xpath = factory.newXPath();

		def dbRefXPath = factory.newXPath();

		def nodeSet = xpath.evaluate (XPATH_MAP.evidence,  root, XPathConstants.NODESET);
		nodeSet.each{ evidenceNode ->
			def refNode = dbRefXPath.evaluate("source/dbReference", evidenceNode, XPathConstants.NODE);

			Article article = new Article();
			article.pubmedId = refNode.getAttribute("id");
			sequence.evidenceKeyMap.put(evidenceNode.getAttribute("key"), article);

		}
	}

	protected void parseSequenceFeatures(Object root, Protein sequence){
		XPathFactory factory = XPathFactory.newInstance();
		def xpath = factory.newXPath();
		def startPosXPath = factory.newXPath();
		def endPosXPath = factory.newXPath();


		def nodeSet = xpath.evaluate (XPATH_MAP.features,  root, XPathConstants.NODESET);
		nodeSet.each{ node ->

			// use the feature type information to create the appropriate feature
			String typeStr = node.getAttribute("type");
			FeatureType type = FeatureType.getTypeFromString(typeStr);
			AbsSequenceFeature feature = null;

			if (type.type.equals("REGION")){
				feature = new SequenceFeatureRange();

				feature.start=startPosXPath.evaluate("location/begin/@position", root, XPathConstants.NUMBER)
				feature.end = endPosXPath.evaluate("location/end/@position", root, XPathConstants.NUMBER);


			}else{
				feature = new SequenceFeaturePoint();
				feature.location = startPosXPath.evaluate("location/position/@position", root, XPathConstants.NUMBER);

			}

			feature.type = type;
			feature.description = node.getAttribute("description");

			String[] evidenceIds = node.getAttribute("evidence")?.split(" ");
			if (evidenceIds != null){
				evidenceIds.each{evidenceId ->
					Article article = sequence.evidenceKeyMap.get(evidenceId);
					if (article != null){
						feature.evidence.add(article);
					}
				}
			}


			sequence.addFeature(feature);

		}

	}



	@Override
	public List<Protein> fetchAll(String id) throws IOException {
		URL url = getUrl(id, null);
		return readList(url.openStream())
	}

	@Override
	public URL getUrl(String id, Map<String, String> paramMap) {
		return new URL("http://www.uniprot.org/uniprot/${id}.xml")
	}


	@Override
	public IFetcher<Protein> getNewInstance() {
		return new UniProtReader();
	}


}
